load("/home/rstudio/22-11-23_IfnKO-integration.Rdata")
volcano_ylims <- c(0, 25)
volcano_xlims <- c(-1, 1)
dnmt_df %>%
dplyr::filter(experiment != "naive (Ifnagr KO)" & !is.na(meth)) %>%
ggplot(aes(x = tissue, y = meth, color=tissue, group=tissue)) +
rasterise(ggbeeswarm::geom_quasirandom(size=.2)) +
stat_summary(fun.data=median_hilow, geom="errorbar", width=0, size=.5, color="black", fun.args=c(conf.int=.5)) +
stat_summary(fun=median, geom="point", size=2, color="black") +
scale_y_continuous(labels=scales::percent_format(accuracy=1L), expand=c(.005,.005)) +
scale_color_manual(values=c("vSVZ"="#0047b9", "striatum"="#78b2ff")) +
facet_wrap(~experiment, nrow=1) +
labs(x = "", y = d$label) +
theme(panel.grid = element_blank(), legend.position = "none",
panel.border = element_rect(colour="black", fill=NA, size=1),
axis.text.x = element_text(angle=45, vjust=1, hjust=1))

ggplot2::ggsave("/home/rstudio/22-12-07_Dnmt3a-meth-beeswarm.png", width=12, height=7, units="cm", dpi=500)
ggplot2::ggsave("/home/rstudio/22-12-07_Dnmt3a-meth-beeswarm.svg", width=12, height=7, units="cm", fix_text_size=F)
tmp <- quant_df_all %>%
mutate(cell_id_dna = unname(cellname_to_dna_id[sample])) %>%
dplyr::filter(experiment %in% c("naive", "2 dpi", "21 dpi", "2 dpi (Ifnagr KO)") &
method=="scNMT" &
tissue == "vSVZ" &
!is.na(cell_id_dna) &
cell_id_dna %in% rownames(dmr21dpi_mtx)) %>%
dplyr::filter(!(experiment == "naive" & is.na(ptime))) %>% # remove naive non-lineage cells
group_by(experiment) %>%
mutate(n_bins = floor(n() / cells_per_small_bin)) %>% # this ensures we get about 10-12 cells per bin
ungroup() %>%
mutate(n_bins = if_else(n_bins < 1, 1, n_bins), # bins of size 0 don't make sense, need at least 1 bin
ptime = if_else(is.na(ptime), rnorm(mean=-100, n=n()), ptime))
# cut_number doesnt work with group_by, so I group manually:
quant_df_binned_svz <- bind_rows(
tmp %>%
dplyr::filter(experiment == "naive") %>%
mutate(ptime_bin = paste0("n_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin),
tmp %>%
dplyr::filter(experiment == "2 dpi") %>%
mutate(ptime_bin = paste0("i2_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin),
tmp %>%
dplyr::filter(experiment == "21 dpi") %>%
mutate(ptime_bin = paste0("i21_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin),
tmp %>%
dplyr::filter(experiment == "2 dpi (Ifnagr KO)") %>%
mutate(ptime_bin = paste0("ko_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin)
) %>%
group_by(ptime_bin) %>%
add_count(celltype_tissue, name="n") %>%
mutate(most_common = celltype_tissue[n == max(n)][1]) %>% # the most common celltype in that bin
ungroup() %>%
arrange(experiment, ptime)
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
tmp <- quant_df_all %>%
mutate(cell_id_dna = unname(cellname_to_dna_id[sample])) %>%
dplyr::filter(experiment %in% c("naive", "2 dpi", "21 dpi", "2 dpi (Ifnagr KO)") &
method=="scNMT" &
tissue == "striatum" &
!is.na(cell_id_dna) &
cell_id_dna %in% rownames(dmr21dpi_mtx)) %>%
dplyr::filter(!(experiment == "naive" & is.na(ptime))) %>% # remove naive non-lineage cells
group_by(experiment) %>%
mutate(n_bins = floor(n() / cells_per_small_bin)) %>% # this ensures we get about 10-12 cells per bin
ungroup() %>%
mutate(n_bins = if_else(n_bins < 1, 1, n_bins), # bins of size 0 don't make sense, need at least 1 bin
ptime = if_else(is.na(ptime), rnorm(mean=-100, n=n()), ptime))
# cut_number doesnt work with group_by, so I group manually:
quant_df_binned_str <- bind_rows(
tmp %>%
dplyr::filter(experiment == "naive") %>%
mutate(ptime_bin = paste0("n_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin),
tmp %>%
dplyr::filter(experiment == "2 dpi") %>%
mutate(ptime_bin = paste0("i2_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin),
tmp %>%
dplyr::filter(experiment == "21 dpi") %>%
mutate(ptime_bin = paste0("i21_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin),
tmp %>%
dplyr::filter(experiment == "2 dpi (Ifnagr KO)") %>%
mutate(ptime_bin = paste0("ko_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>%
arrange(ptime_bin)
) %>%
group_by(ptime_bin) %>%
add_count(celltype_tissue, name="n") %>%
mutate(most_common = celltype_tissue[n == max(n)][1]) %>% # the most common celltype in that bin
ungroup() %>%
arrange(experiment, ptime)
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
rm(tmp)
DMRs found in the lineage after 2 days of ischemia
g1 <- lmr_df %>%
dplyr::filter(treatment == "naive" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
g2 <- lmr_df %>%
dplyr::filter(treatment == "2 dpi" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
lmr_df %>%
dplyr::filter(genotype != "Ifnagr KO") %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
)) %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>%
dplyr::filter(tissue == "vSVZ" & experiment %in% c("naive", "2 dpi")) %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
)) %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")
p_selection

# Write this file as input to scbs scan
# bind_rows(
# dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q2toTAP_naive"),
# dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "q2toTAP_2dpi")
# ) %>%
# mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>%
# dplyr::select(cell_id_dna, group) %>%
# write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q2toTAP-naive-vs-2dpi_labels.csv",
# col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpi_bw2000_ss100_t0.05_mc6_annotated.tsv",
show_col_types=F)
dmr_df_labels <- dmr_df %>%
dplyr::filter(padj < .05 & overlaps_gene) %>% # n_obs_fg > 25 & n_obs_bg > 25 &
slice_min(p, n=10) %>%
arrange(p)
p_volcano <- dmr_df %>%
dplyr::filter(overlaps_gene) %>% # n_obs_fg > 25 & n_obs_bg > 25 &
ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
geom_point_rast(size=.3) +
geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
limits=volcano_xlims) +
scale_y_continuous(limits=volcano_ylims) +
labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
legend.position = "none")
p_volcano
## Warning: Removed 7 rows containing missing values (geom_point).

hm_df <- dmr_df %>%
dplyr::filter(padj < .05) %>%
arrange(direction)
dmr_mtx <- data.table::fread(
"/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpi_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
data.table = F) %>%
column_to_rownames(var="V1") %>%
as.matrix() %>%
magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>%
magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>%
magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>%
magrittr::extract( , hm_df$dmr)
bin_df <- quant_df_binned_str %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_str <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_str

bin_df <- quant_df_binned_svz %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_svz <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
grid.grabExpr(draw(hm_svz)) +
grid.grabExpr(draw(hm_str)) +
plot_layout(heights=c(2, 5, 5))
svglite("/home/rstudio/23-02-10_2dpi-lineage-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 7 rows containing missing values (geom_point).
dev.off()
## png
## 2
DMRs found in the vSVZ lineage after 21 days of ischemia
g1 <- lmr_df %>%
dplyr::filter(treatment == "naive" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
g2 <- lmr_df %>%
dplyr::filter(treatment == "21 dpi" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
lmr_df %>%
dplyr::filter(genotype != "Ifnagr KO") %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
)) %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>%
dplyr::filter(tissue == "vSVZ" & experiment %in% c("naive", "21 dpi")) %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
)) %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")
p_selection

# Write this file as input to scbs scan
# bind_rows(
# dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q2toTAP_naive"),
# dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "q2toTAP_2dpi")
# ) %>%
# mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>%
# dplyr::select(cell_id_dna, group) %>%
# write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q2toTAP-naive-vs-21dpi_labels.csv",
# col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-21dpi_bw2000_ss100_t0.05_mc6_annotated.tsv",
show_col_types=F)
dmr_df_labels <- dmr_df %>%
dplyr::filter(padj < .05 & overlaps_gene) %>% # & n_obs_fg > 25 & n_obs_bg > 25
slice_min(p, n=10) %>%
arrange(p)
p_volcano <- dmr_df %>%
dplyr::filter(overlaps_gene) %>% # & n_obs_fg > 25 & n_obs_bg > 25
ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
geom_point_rast(size=.3) +
geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
limits=volcano_xlims) +
scale_y_continuous(limits=volcano_ylims) +
labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
legend.position = "none")
p_volcano
## Warning: Removed 7 rows containing missing values (geom_point).

hm_df <- dmr_df %>%
dplyr::filter(padj < .05) %>%
arrange(direction)
dmr_mtx <- data.table::fread(
"/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-21dpi_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
data.table = F) %>%
column_to_rownames(var="V1") %>%
as.matrix() %>%
magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>%
magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>%
magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>%
magrittr::extract( , hm_df$dmr)
bin_df <- quant_df_binned_str %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_str <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_str

bin_df <- quant_df_binned_svz %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_svz <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
grid.grabExpr(draw(hm_svz)) +
grid.grabExpr(draw(hm_str)) +
plot_layout(heights=c(2, 5, 5))
svglite("/home/rstudio/23-02-10_21dpi-lineage-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 7 rows containing missing values (geom_point).
dev.off()
## png
## 2
DMRs found in the non-neurogenic reactive astrocytes 2dpi (vSVZ +
Striatum combined)
g1 <- lmr_df %>%
dplyr::filter(treatment == "naive" & celltype_tissue %in% c("astrocyte (striatum)", "qNSC1") &
genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
g2 <- lmr_df %>%
dplyr::filter(treatment == "2 dpi" & celltype_tissue %in% c("reactive astrocyte") &
genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
lmr_df %>%
dplyr::filter(genotype != "Ifnagr KO") %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
)) %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>%
dplyr::filter(experiment %in% c("naive", "2 dpi")) %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
),
tissue = "vSVZ + striatum") %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")
p_selection

# Write this file as input to scbs scan
# bind_rows(
# dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q1-SVZStr-naive"),
# dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "RA-SVZStr-2dpi")
# ) %>%
# mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>%
# dplyr::select(cell_id_dna, group) %>%
# write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q1-SVZStr-naive-vs-RA-SVZStr-2dpi_labels.csv",
# col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q1-SVZStr-naive-vs-RA-SVZStr-2dpi_bw2000_ss100_t0.05_mc6_annotated.tsv",
show_col_types=F)
dmr_df_labels <- dmr_df %>%
dplyr::filter(padj < .05 & overlaps_gene) %>% # & n_obs_fg > 25 & n_obs_bg > 25
slice_min(p, n=10) %>%
arrange(p)
p_volcano <- dmr_df %>%
dplyr::filter(overlaps_gene) %>% # & n_obs_fg > 25 & n_obs_bg > 25
ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
geom_point_rast(size=.3) +
geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
limits=volcano_xlims) +
scale_y_continuous(limits=volcano_ylims) +
labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
legend.position = "none")
p_volcano
## Warning: Removed 2 rows containing missing values (geom_point).

hm_df <- dmr_df %>%
dplyr::filter(padj < .05) %>%
arrange(direction)
dmr_mtx <- data.table::fread(
"/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q1-SVZStr-naive-vs-RA-SVZStr-2dpi_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
data.table = F) %>%
column_to_rownames(var="V1") %>%
as.matrix() %>%
magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>%
magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>%
magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>%
magrittr::extract( , hm_df$dmr)
bin_df <- quant_df_binned_str %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_str <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_str

bin_df <- quant_df_binned_svz %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_svz <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
grid.grabExpr(draw(hm_svz)) +
grid.grabExpr(draw(hm_str)) +
plot_layout(heights=c(2, 5, 5))
svglite("/home/rstudio/23-02-10_RA-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 2 rows containing missing values (geom_point).
dev.off()
## png
## 2
“Control”: DMRs found in the non-neurogenic reactive astrocytes 2dpi
(vSVZ + Striatum combined)
g1 <- lmr_df %>%
dplyr::filter(treatment == "naive" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>%
pull(sample)
g2 <- lmr_df %>%
dplyr::filter(treatment == "2 dpi" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
tissue == "vSVZ" & genotype == "Ifnagr KO" & !is.na(meth_score)) %>%
pull(sample)
lmr_df %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
)) %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>%
dplyr::filter(experiment %in% c("naive", "2 dpi (Ifnagr KO)")) %>%
slice_sample(prop=1) %>% # randomize overplotting ...
arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
mutate(selection = case_when(
is.na(meth_score) ~ "NA",
sample %in% g1 ~ "g1",
sample %in% g2 ~ "g2",
T ~ "-"
),
tissue = "vSVZ + striatum") %>%
ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") + # 0.1
geom_point_rast(size=1.5, stroke=.2, shape=21) + # 0.5
facet_grid(tissue ~ experiment) +
coord_fixed() +
labs(x = "UMAP1", y = "UMAP2") +
scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
plot.background = element_blank(), strip.background = element_blank(),
panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")
p_selection

# Write this file as input to scbs scan
# bind_rows(
# dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q1-SVZStr-naive"),
# dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "RA-SVZStr-2dpi")
# ) %>%
# mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>%
# dplyr::select(cell_id_dna, group) %>%
# write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q1-SVZStr-naive-vs-RA-SVZStr-2dpi_labels.csv",
# col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpiKO_bw2000_ss100_t0.05_mc6_annotated.tsv",
show_col_types=F)
dmr_df_labels <- dmr_df %>%
dplyr::filter(padj < .05 & overlaps_gene) %>% # & n_obs_fg > 25 & n_obs_bg > 25
slice_min(p, n=10) %>%
arrange(p)
p_volcano <- dmr_df %>%
dplyr::filter(overlaps_gene) %>% # & n_obs_fg > 25 & n_obs_bg > 25
ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
geom_point_rast(size=.3) +
geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
limits=volcano_xlims) +
scale_y_continuous(limits=volcano_ylims) +
labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
legend.position = "none")
p_volcano
## Warning: Removed 6 rows containing missing values (geom_point).

hm_df <- dmr_df %>%
dplyr::filter(padj < .05) %>%
arrange(direction)
dmr_mtx <- data.table::fread(
"/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpiKO_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
data.table = F) %>%
column_to_rownames(var="V1") %>%
as.matrix() %>%
magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>%
magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>%
magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>%
magrittr::extract( , hm_df$dmr)
bin_df <- quant_df_binned_str %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_str <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_str

bin_df <- quant_df_binned_svz %>%
distinct(ptime_bin, experiment, most_common)
bin_ctypes <- bin_df %>%
pull(most_common)
ha <- HeatmapAnnotation(celltype = bin_ctypes,
col = list(celltype = mycolorscheme2),
show_legend=F,
simple_anno_size=unit(2,"mm"),
show_annotation_name=F)
meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
magrittr::set_rownames(hm_df$dmr) %>%
magrittr::set_colnames(bin_df$ptime_bin)
hm_svz <- meth_mtx_ptime_bins %>%
t() %>% scale() %>% t() %>%
Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
show_row_names=F, show_column_names=F,
name="meth", bottom_annotation=ha,
width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
row_split=paste(hm_df$direction, "LMRs", sep="\n"),
column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
color_viridis(., option="inferno", direction=-1L))
hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
grid.grabExpr(draw(hm_svz)) +
grid.grabExpr(draw(hm_str)) +
plot_layout(heights=c(2, 5, 5))
svglite("/home/rstudio/23-02-10_2dpiKO-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 6 rows containing missing values (geom_point).
dev.off()
## png
## 2